# Reads excel files directly
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
# PLS-DA with this
library(mixOmics)
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 4.1.2
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.1.2
##
## Loaded mixOmics 6.16.3
## Thank you for using mixOmics!
## Tutorials: http://mixomics.org
## Bookdown vignette: https://mixomicsteam.github.io/Bookdown
## Questions, issues: Follow the prompts at http://mixomics.org/contact-us
## Cite us: citation('mixOmics')
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## Warning: package 'lubridate' was built under R version 4.1.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.1.8
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::map() masks mixOmics::map()
## ✖ dplyr::select() masks MASS::select()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(tidymodels)
## Warning: package 'tidymodels' was built under R version 4.1.2
## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom 1.0.3 ✔ rsample 1.1.1
## ✔ dials 1.1.0 ✔ tune 1.0.1
## ✔ infer 1.0.4 ✔ workflows 1.1.3
## ✔ modeldata 1.1.0 ✔ workflowsets 1.0.0
## ✔ parsnip 1.0.4 ✔ yardstick 1.1.0
## ✔ recipes 1.0.5
## Warning: package 'broom' was built under R version 4.1.2
## Warning: package 'dials' was built under R version 4.1.2
## Warning: package 'scales' was built under R version 4.1.2
## Warning: package 'infer' was built under R version 4.1.2
## Warning: package 'modeldata' was built under R version 4.1.2
## Warning: package 'parsnip' was built under R version 4.1.2
## Warning: package 'recipes' was built under R version 4.1.2
## Warning: package 'rsample' was built under R version 4.1.2
## Warning: package 'tune' was built under R version 4.1.2
## Warning: package 'workflows' was built under R version 4.1.2
## Warning: package 'workflowsets' was built under R version 4.1.2
## Warning: package 'yardstick' was built under R version 4.1.2
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ recipes::fixed() masks stringr::fixed()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::map() masks mixOmics::map()
## ✖ parsnip::pls() masks mixOmics::pls()
## ✖ dplyr::select() masks MASS::select()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step() masks stats::step()
## ✖ tune::tune() masks parsnip::tune(), mixOmics::tune()
## • Search for functions across packages at https://www.tidymodels.org/find/
# Data wrangling
library(dplyr)
# Normalize data with this
library(NormalyzerDE)
# median imputation
library(missMethods)
## Warning: package 'missMethods' was built under R version 4.1.2
set.seed(69)
CE_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "CE")
CER_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "CER")
DAG_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "DAG")
DCER_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "DCER")
FFA_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "FFA")
HCER_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "HCER")
LCER_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "LCER")
LPC_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "LPC")
LPE_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "LPE")
MAG_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "MAG")
PC_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "PC")
PE_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "PE")
PI_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "PI")
SM_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "SM")
TAG_excel <- read_excel("lipidomics_species_concentrations.xlsx", sheet = "TAG")
Y <- gl(2, 40, labels = c("BL", "AI"))
# X <- data.matrix
#Checking distributions of Y
summary(Y)
## BL AI
## 40 40
X_CE <- CE_species <- as.matrix(subset(CE_excel, select = -c(ID, group, gender_group)))
## Remove columns with more than 50% NA
X_CE <- X_CE[, which(colMeans(!is.na(X_CE)) > 0.7)]
X_CE <- as_tibble(X_CE)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_CE)))
X_CE[selected_columns] <- lapply(X_CE[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_CER <- CER_species <- as.matrix(subset(CER_excel, select = -c(ID, group, gender_group)))
X_CER <- X_CER[, which(colMeans(!is.na(X_CER)) > 0.7)]
X_CER <- as_tibble(X_CER)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_CER)))
X_CER[selected_columns] <- lapply(X_CER[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_DAG <- DAG_species <- as.matrix(subset(DAG_excel, select = -c(ID, group, gender_group)))
X_DAG <- X_DAG[, which(colMeans(!is.na(X_DAG)) > 0.7)]
X_DAG <- as_tibble(X_DAG)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_DAG)))
X_DAG[selected_columns] <- lapply(X_DAG[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_DCER <- DCER_species <- as.matrix(subset(DCER_excel, select = -c(ID, group, gender_group)))
X_DCER <- X_DCER[, which(colMeans(!is.na(X_DCER)) > 0.7)]
X_DCER <- as_tibble(X_DCER)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_DCER)))
X_DCER[selected_columns] <- lapply(X_DCER[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_FFA <- FFA_species <- as.matrix(subset(FFA_excel, select = -c(ID, group, gender_group)))
X_FFA <- X_FFA[, which(colMeans(!is.na(X_FFA)) > 0.7)]
X_FFA <- as_tibble(X_FFA)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_FFA)))
X_FFA[selected_columns] <- lapply(X_FFA[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_HCER <- HCER_species <- as.matrix(subset(HCER_excel, select = -c(ID, group, gender_group)))
X_HCER <- X_HCER[, which(colMeans(!is.na(X_HCER)) > 0.7)]
X_HCER <- as_tibble(X_HCER)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_HCER)))
X_HCER[selected_columns] <- lapply(X_HCER[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_LCER <- LCER_species <- as.matrix(subset(LCER_excel, select = -c(ID, group, gender_group)))
X_LCER <- X_LCER[, which(colMeans(!is.na(X_LCER)) > 0.7)]
X_LCER <- as_tibble(X_LCER)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_LCER)))
X_LCER[selected_columns] <- lapply(X_LCER[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_LPC <- LPC_species <- as.matrix(subset(LPC_excel, select = -c(ID, group, gender_group)))
X_LPC <- X_LPC[, which(colMeans(!is.na(X_LPC)) > 0.7)]
X_LPC <- as_tibble(X_LPC)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_LPC)))
X_LPC[selected_columns] <- lapply(X_LPC[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_LPE <- LPE_species <- as.matrix(subset(LPE_excel, select = -c(ID, group, gender_group)))
X_LPE <- X_LPE[, which(colMeans(!is.na(X_LPE)) > 0.7)]
X_LPE <- as_tibble(X_LPE)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_LPE)))
X_LPE[selected_columns] <- lapply(X_LPE[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_MAG <- MAG_species <- as.matrix(subset(MAG_excel, select = -c(ID, group, gender_group)))
X_MAG <- X_MAG[, which(colMeans(!is.na(X_MAG)) > 0.7)]
X_MAG <- as_tibble(X_MAG)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_MAG)))
X_MAG[selected_columns] <- lapply(X_MAG[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_PC <- PC_species <- as.matrix(subset(PC_excel, select = -c(ID, group, gender_group)))
X_PC <- X_PC[, which(colMeans(!is.na(X_PC)) > 0.7)]
X_PC <- as_tibble(X_PC)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_PC)))
X_PC[selected_columns] <- lapply(X_PC[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_PE <- PE_species <- as.matrix(subset(PE_excel, select = -c(ID, group, gender_group)))
X_PE <- X_PE[, which(colMeans(!is.na(X_PE)) > 0.7)]
X_PE <- as_tibble(X_PE)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_PE)))
X_PE[selected_columns] <- lapply(X_PE[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_PI <- PI_species <- as.matrix(subset(PI_excel, select = -c(ID, group, gender_group)))
X_PI <- X_PI[, which(colMeans(!is.na(X_PI)) > 0.7)]
X_PI <- as_tibble(X_PI)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_PI)))
X_PI[selected_columns] <- lapply(X_PI[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_SM <- SM_species <- as.matrix(subset(SM_excel, select = -c(ID, group, gender_group)))
X_SM <- X_SM[, which(colMeans(!is.na(X_SM)) > 0.7)]
X_SM <- as_tibble(X_SM)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_SM)))
X_SM[selected_columns] <- lapply(X_SM[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_TAG <- TAG_species <- as.matrix(subset(TAG_excel, select = -c(ID, group, gender_group)))
X_TAG <- X_TAG[, which(colMeans(!is.na(X_TAG)) > 0.7)]
X_TAG <- as_tibble(X_TAG)
# Replace with minimum value in range of DF
selected_columns <- c(1:(ncol(X_TAG)))
X_TAG[selected_columns] <- lapply(X_TAG[selected_columns],
function(x)
replace(x,is.na(x), min(x, na.rm = T)
))
X_CE <- as.matrix(X_CE)
# X_CE <- medianNormalization(X_CE, noLogTransform = TRUE)
X_CER <- as.matrix(X_CER)
X_CER <- medianNormalization(X_CER, noLogTransform = TRUE)
X_DAG <- as.matrix(X_DAG)
X_DAG <- medianNormalization(X_DAG, noLogTransform = TRUE)
X_DCER <- as.matrix(X_DCER)
X_DCER <- medianNormalization(X_DCER, noLogTransform = TRUE)
X_FFA <- as.matrix(X_FFA)
X_FFA <- medianNormalization(X_FFA, noLogTransform = TRUE)
X_HCER <- as.matrix(X_HCER)
X_HCER <- medianNormalization(X_HCER, noLogTransform = TRUE)
X_LCER <- as.matrix(X_LCER)
X_LCER <- medianNormalization(X_LCER, noLogTransform = TRUE)
X_LPC <- as.matrix(X_LPC)
X_LPC <- medianNormalization(X_LPC, noLogTransform = TRUE)
X_LPE <- as.matrix(X_LPE)
X_LPE <- medianNormalization(X_LPE, noLogTransform = TRUE)
X_MAG <- as.matrix(X_MAG)
X_MAG <- medianNormalization(X_MAG, noLogTransform = TRUE)
X_PC <- as.matrix(X_PC)
# X_PC <- medianNormalization(X_PC, noLogTransform = TRUE)
X_PE <- as.matrix(X_PE)
X_PE <- medianNormalization(X_PE, noLogTransform = TRUE)
X_PI <- as.matrix(X_PI)
X_PI <- medianNormalization(X_PI, noLogTransform = TRUE)
X_SM <- as.matrix(X_SM)
X_SM <- medianNormalization(X_SM, noLogTransform = TRUE)
X_TAG <- as.matrix(X_TAG)
# X_TAG <- medianNormalization(X_TAG, noLogTransform = TRUE)
CE.plsda <- plsda(X_CE, Y, ncomp = 10)
CER.plsda <- plsda(X_CER, Y, ncomp = 10)
## Warning in Check.entry.pls(X, Y, ncomp, keepX, keepY, mode = mode, scale =
## scale, : Reset maximum number of variates 'ncomp' to ncol(X) = 9.
DAG.plsda <- plsda(X_DAG, Y, ncomp = 10)
DCER.plsda <- plsda(X_DCER, Y, ncomp = 10)
## Warning in Check.entry.pls(X, Y, ncomp, keepX, keepY, mode = mode, scale =
## scale, : Reset maximum number of variates 'ncomp' to ncol(X) = 6.
FFA.plsda <- plsda(X_FFA, Y, ncomp = 10)
HCER.plsda <- plsda(X_HCER, Y, ncomp = 10)
## Warning in Check.entry.pls(X, Y, ncomp, keepX, keepY, mode = mode, scale =
## scale, : Reset maximum number of variates 'ncomp' to ncol(X) = 7.
LCER.plsda <- plsda(X_LCER, Y, ncomp = 10)
## Warning in Check.entry.pls(X, Y, ncomp, keepX, keepY, mode = mode, scale =
## scale, : Reset maximum number of variates 'ncomp' to ncol(X) = 7.
LPC.plsda <- plsda(X_LPC, Y, ncomp = 10)
LPE.plsda <- plsda(X_LPE, Y, ncomp = 10)
MAG.plsda <- plsda(X_MAG, Y, ncomp = 10)
## Warning in Check.entry.pls(X, Y, ncomp, keepX, keepY, mode = mode, scale =
## scale, : Reset maximum number of variates 'ncomp' to ncol(X) = 6.
PC.plsda <- plsda(X_PC, Y, ncomp = 10)
PE.plsda <- plsda(X_PE, Y, ncomp = 10)
PI.plsda <- plsda(X_PI, Y, ncomp = 10)
SM.plsda <- plsda(X_SM, Y, ncomp = 10)
TAG.plsda <- plsda(X_TAG, Y, ncomp = 10)
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(CE.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'CE')
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## ℹ The deprecated feature was likely used in the mixOmics package.
## Please report the issue at
## <]8;;https://github.com/mixOmicsTeam/mixOmics/issues/https://github.com/mixOmicsTeam/mixOmics/issues/]8;;>.
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(CE.plsda, comp.predicted=2, dist = "max.dist")
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(CER.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'CER')
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(CER.plsda, comp.predicted=2, dist = "max.dist")
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(DAG.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'DAG')
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(DAG.plsda, comp.predicted=2, dist = "max.dist")
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(DCER.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'DCER')
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(DCER.plsda, comp.predicted=2, dist = "max.dist")
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(FFA.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'FFA')
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(FFA.plsda, comp.predicted=2, dist = "max.dist")
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(HCER.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'HCER')
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(HCER.plsda, comp.predicted=2, dist = "max.dist")
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(LCER.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'LCER')
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(LCER.plsda, comp.predicted=2, dist = "max.dist")
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(LPC.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'LPC')
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(LPC.plsda, comp.predicted=2, dist = "max.dist")
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(LPE.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'LPE')
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(LPE.plsda, comp.predicted=2, dist = "max.dist")
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(MAG.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'MAG')
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(MAG.plsda, comp.predicted=2, dist = "max.dist")
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(PC.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'PC')
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(PC.plsda, comp.predicted=2, dist = "max.dist")
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(PI.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'PI')
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(PI.plsda, comp.predicted=2, dist = "max.dist")
#PE
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(PE.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'PE')
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(PE.plsda, comp.predicted=2, dist = "max.dist")
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(SM.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'SM')
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(SM.plsda, comp.predicted=2, dist = "max.dist")
# plot the samples projected onto the first two components of the PLS-DA subspace
plotIndiv(TAG.plsda , comp = 1:2,
group = Y, ind.names = FALSE, # colour points by class
ellipse = TRUE, # include 95% confidence ellipse for each class
legend = TRUE, title = 'TAG')
# use the max.dist measure to form decision boundaries between classes based on PLS-DA data
background = background.predict(TAG.plsda, comp.predicted=2, dist = "max.dist")